# Replication file for:
#
#      "Do political preferences affect policy learning and uptake? 
#       Evidence from a field experiment with informal entrepreneurs"
#
# Corresponding author: 
#       
#      Cesar Zucco
#      cesar.zucco@fgv.br or cesar.zucco@gmail.com
#
# Data Files:
#
# This code requires the following two data files:
# (1) Data_Complete_Brazil.RData: main data file for the project
# (2) Brazil-analyze-Endorsement-TOC.csv: file with the line of this code that generates
#                                         each of the objects in the paper
#
# Instructions: 
#
# Place both files in the same folder
# Change path in line 38, below
# Run/Source
# Figures and tables will be saved in  subfolder  ReplicationFigures 
# Output included in text will be printed to the screen 
# See Readme for details

library(plyr)
library(haven)
library(tidyverse)
library(data.table)
library(estimatr)
library(DeclareDesign)
library(randomizr)
library(xtable)
library(mice)
sessionInfo()

setwd("~/Dropbox/Data/Metaketa") # change this to file 
if (file.exists("ReplicFigures")==F){ dir.create(file.path("ReplicFigures"))} 

# Print a "TOC" indicating the line in the code for each object
toc <- read.csv("Brazil-analyze-Endorsement-TOC.csv",sep=",")
print(toc)

# Load the processed data file and subset to keep only
# the treatment arms analyzed in the paper
load(file="ProcessedData/Data_Complete_Brazil.RData")
ed <- subset(dbrazil,Z==0|Z==1|Z==5)
ed$Z <- factor(ed$Z)

# Differentiate between compliance with T1 and with T5
ed$D[which(ed$Z==5&ed$D==1)] <- 5
ed$D <- factor(ed$D)
print(table(D=ed$D,Z=ed$Z))

# Write in probabilities of assignment (stable across all blocks)
ed$Z_prob <- as.numeric(car::recode(ed$Z,"0=.375;1=.375;5=.250"))

# Replace -999,-998, and -997 values with NA's
ed[ed == -997|ed == -998|ed==-999] <- NA

## Functions #################################################

## Declare functions used in the analysis ##
## These are set up simply to speed up estimation of each model for all outcome variables 
my.summary <- function(index, the.data=ed){
  out <- as.data.frame(matrix(NA, 1, 6))
  tmp <- summary(the.data[,outcomes[index]],na.rm=T)
  out[1,1] <- tmp["Mean"]
  out[1,2] <- sd(the.data[,outcomes[index]],na.rm=T)
  out[1,3] <- tmp["Min."]
  out[1,4] <- tmp["Max."]
  out[1,5] <- tmp["NA's"]
  out[1,6] <- length(table(the.data[,outcomes[index]]))
  names(out) <- c("Mean","SD","Min","Max","Missing","Type") 
  return(out)
}

est_dm <- function(index, treat = "Z", the.data=ed){
  require("ANOVAreplication")
  results_t <- as.data.frame(matrix(NA, 1, 16))
  the.formula<-formula(paste(outcomes[index],"~Z"))
  dm1 <- difference_in_means(the.formula, data =  the.data, condition1=0,condition2=1)
  dm5 <- difference_in_means(the.formula, data =  the.data, condition1=0,condition2=5)
  dm51 <- difference_in_means(the.formula, data =  the.data, condition1=5,condition2=1)
  results_t[1:7] <- round(summary(dm1)$coef,4)
  results_t[8:14] <- round(summary(dm5)$coef,4)
  results_t[15] <- round(summary(dm51)$coef[,4],4)
  results_t[16] <- dm1$nobs
  names(results_t) <- c(paste(colnames(summary(dm1)$coef),1,sep="."),
                        paste(colnames(summary(dm5)$coef),5,sep="."),"p.T5vT1","N")
  return(results_t)
}


#Similar, but this variant is for attrition balance
est_attr <- function(index, treat = "attrition",the.data=ed,w=NULL,std=F){
  require("ANOVAreplication")
  results_t <- as.data.frame(matrix(NA, 1, 8))
  the.formula<-formula(paste(outcomes[index],"~",treat))
  dm <- difference_in_means(the.formula, data = the.data,alpha = 0.1,weights=w)
  results_t[1:7] <- summary(dm)$coef
  results_t[8] <- dm$nobs
  names(results_t) <- c(colnames(summary(dm)$coef),"N")
  if(std==T){#compute standardized mean differences
    tmp <- data.frame(y = the.data[,outcomes[index]], g = the.data[,treat])
    pool_sd <- pooled.sd(na.omit(tmp))
    results_t <- cbind(results_t,'Std. Estimate'=round(results_t[1,1]/pool_sd,5))
  }
  return(results_t)
}

est_itt <- function(index, treat = "Z", ctrls=NULL, fe=F, baseline=F,the.data=ed){
  results_t <- as.data.frame(matrix(NA, 1, 15))
  if(is.null(ctrls)){ 
    the.formula <- formula(paste(outcomes[index]," ~ Z"))
  }else{
    if(baseline==F){ the.formula <- formula(paste(outcomes[index],"~ Z +",paste(ctrls,collapse="+")))}
    if(baseline==T){ the.formula <- formula(paste(outcomes[index],"~ Z +",paste(ctrls,collapse="+"),"+",baselines[index])) }
  }
  if(fe){  
    reg <- lm_robust(the.formula, se_type="stata", fixed_effects= ~ block, data = the.data)
  }else{reg <- lm_robust(the.formula, data = the.data, se_type="stata")
  }
  results_t[1:7] <- round(summary(reg)$coef["Z1",],4)
  results_t[8:14] <- round(summary(reg)$coef["Z5",],4)
  results_t[15] <- reg$nobs
  names(results_t) <- c(paste(colnames(summary(reg)$coef),1,sep="."),
                        paste(colnames(summary(reg)$coef),5,sep="."),"N")
  return(results_t)
}


est_cace <- function(index, treat = "Z", ctrls=NULL, fe=F, baseline=F, inst="D",the.data=ed){
  if(index==1){cat("Separate IV estimation for Z1 and Z5 relative to Z0!\n")}
  results_t <- as.data.frame(matrix(NA, 1, 15))
  if(is.null(ctrls)){ 
    the.formula <- formula(paste(outcomes[index]," ~ D  | Z"))
  }else{
    if(baseline==F){ the.formula <- formula(paste(outcomes[index],"~ D +",paste(ctrls,collapse="+"),"| Z +",paste(ctrls,collapse="+")))}
    if(baseline==T){ the.formula <- formula(paste(outcomes[index],"~ D +",paste(ctrls,collapse="+"),"+",baselines[index],"| Z +",paste(ctrls,collapse="+"),"+",baselines[index])) 
    }
  }
  if(fe){  
        regZ1 <- iv_robust(the.formula, se_type="stata", fixed_effects= block, data = subset(the.data,Z==0|Z==1))
        regZ5 <- iv_robust(the.formula, se_type="stata", fixed_effects= block, data = subset(the.data,Z==0|Z==5))
  }else{regZ1 <- iv_robust(the.formula, data = subset(the.data,Z==0|Z==1), se_type="stata")
        regZ5 <- iv_robust(the.formula, data = subset(the.data,Z==0|Z==5), se_type="stata")
  }
  results_t[1:7] <- round(summary(regZ1)$coef["D1",],4)
  results_t[8:14] <- round(summary(regZ5)$coef["D5",],4)
  results_t[15] <- regZ5$nobs
  names(results_t) <- c(paste(colnames(summary(regZ1)$coef),"1",sep="."),
                              paste(colnames(summary(regZ5)$coef),"5",sep="."),"N.5")
  return(results_t)
}


# Moderation (interactions with treatment indicator)
est_mod <- function(index, treat = "Z1", ctrls=NULL, fe=F, mod=NULL,the.data=ed){
  if(is.null(mod)){stop("Error! Need to specify moderator variable\n")}
  results_t <- as.data.frame(matrix(NA, 1, 21))
  if(is.null(ctrls)){ 
    the.formula <- formula(paste(outcomes[index]," ~ Z *",mod))
  }else{
    the.formula <- formula(paste(outcomes[index],"~ Z *",mod,"+",paste(ctrls,collapse="+")))
  }
  if(fe){  
        reg <- lm_robust(the.formula, se_type="stata", fixed_effects= ~ block, data = the.data,alpha = 0.1)
  }else{reg <- lm_robust(the.formula, data = the.data, se_type="stata",alpha = 0.1)
  }
  results_t[1:7] <- round(summary(reg)$coef[treat,],5)
  results_t[8:14] <- round(summary(reg)$coef[grep(paste(treat,":",sep=""),names(coef(reg))),],5) #interaction term
  results_t[15] <- reg$nobs
  #marginal effects with high and low values of the moderator 
  if(length(unique(the.data[,mod]))<=5){
    mod.vals <- range(the.data[,mod],na.rm=T)
  }else{  
    mod.vals <- quantile(the.data[,mod],probs=c(0.1,0.9),na.rm=T)  
  }
  p.data <- the.data
  p.data[,mod] <- p.data[,mod]-mod.vals[1] #center data on low value and reestimate
  if(fe){  
    reg1 <- lm_robust(the.formula, se_type="stata", fixed_effects= ~ block, data = p.data,alpha = 0.1)
  }else{
    reg1 <- lm_robust(the.formula, data = p.data, se_type="stata",alpha = 0.1)
  }
  p.data <- the.data
  p.data[,mod] <- p.data[,mod]-mod.vals[2] #center data on high value and reestiamte
  if(fe){  
    reg2 <- lm_robust(the.formula, se_type="stata", fixed_effects= ~ block, data = p.data,alpha = 0.1)
  }else{
    reg2 <- lm_robust(the.formula, data = p.data, se_type="stata",alpha = 0.1)
  }
  results_t[16] <- summary(reg1)$coef[treat,"Estimate"]
  results_t[17] <- summary(reg1)$coef[treat,"Std. Error"]
  results_t[18] <- summary(reg1)$coef[treat,"Pr(>|t|)"]
  results_t[19] <- summary(reg2)$coef[treat,"Estimate"]
  results_t[20] <- summary(reg2)$coef[treat,"Std. Error"]
  results_t[21] <- summary(reg2)$coef[treat,"Pr(>|t|)"]
  names(results_t) <- c(colnames(summary(reg)$coef),paste("I",colnames(summary(reg)$coef),sep="."),"N",
                        "mod.pel","mod.sel","mod.pl","mod.peh","mod.seh","mod.ph")
  return(results_t)
}


# Moderation (interactions with treatment indicator with low/high discrete regression
est_mod2 <- function(index, treat = "Z1", ctrls=NULL, fe=F, mod=NULL,the.data=ed){
  if(is.null(mod)){stop("Error! Need to specify moderator variable\n")}
  results_t <- as.data.frame(matrix(NA, 1, 21))
  the.data$mod <- the.data[,mod] 
  if(length(unique(na.omit(the.data[,mod])))>2){
      cat("Dichotomizing continuous moderator at mean\n")
      the.data$mod <- as.numeric(the.data$mod > median(the.data$mod,na.rm=T))}
  if(is.null(ctrls)){ 
    the.formula <- formula(paste(outcomes[index]," ~ Z * mod"))
  }else{
    the.formula <- formula(paste(outcomes[index],"~ Z * mod +",paste(ctrls,collapse="+")))
  }
  if(fe){  
    reg <- lm_robust(the.formula, se_type="stata", fixed_effects= ~ block, data = the.data,alpha = 0.1)
  }else{reg <- lm_robust(the.formula, data = the.data, se_type="stata",alpha = 0.1)
  }
  results_t[1:7] <- round(summary(reg)$coef[treat,],4)
  results_t[8:14] <- round(summary(reg)$coef[grep(paste(treat,":",sep=""),names(coef(reg))),],4) #interaction term
  results_t[15] <- reg$nobs
  #marginal effects with high and low values of the moderator 
  if(is.null(ctrls)){
    the.formula <- formula(paste(outcomes[index]," ~ Z"))
  }else{
    the.formula <- formula(paste(outcomes[index],"~ Z  +",paste(ctrls,collapse="+")))
  }
  if(fe){
    reg1 <- lm_robust(the.formula, se_type="stata", fixed_effects= ~ block, data = subset(the.data,mod==0),alpha = 0.1)
    reg2 <- lm_robust(the.formula, se_type="stata", fixed_effects= ~ block, data = subset(the.data,mod==1),alpha = 0.1)
  }else{
    reg1 <- lm_robust(the.formula, data = subset(the.data,mod==0), se_type="stata",alpha = 0.1)
    reg2 <- lm_robust(the.formula, data = subset(the.data,mod==1), se_type="stata",alpha = 0.1)
  }
  results_t[16] <- round(summary(reg1)$coef[treat,"Estimate"],4)
  results_t[17] <- round(summary(reg1)$coef[treat,"Std. Error"],4)
  results_t[18] <- round(summary(reg1)$coef[treat,"Pr(>|t|)"],4)
  results_t[19] <- round(summary(reg2)$coef[treat,"Estimate"],4)
  results_t[20] <- round(summary(reg2)$coef[treat,"Std. Error"],4)
  results_t[21] <- round(summary(reg2)$coef[treat,"Pr(>|t|)"],4)
  names(results_t) <- c(colnames(summary(reg)$coef),paste("I",colnames(summary(reg)$coef),sep="."),"N",
                        "mod.pel","mod.sel","mod.pl","mod.peh","mod.seh","mod.ph")
  return(results_t)
}


est_mediation <- function(index, treat = "Zcomb", ctrls=NULL, fe=F, mediator="know_meigeneral_e",the.data=ed){
  require(mediation)
  res <- as.data.frame(matrix(NA, 1, 8
                              ,dimnames = list(outcomes[index],c("ACME","ACMEp","ADE","ADEp","Total","Totalp","PropMed","ProbMedp"))))
  the.data$treat <- as.numeric(the.data[,treat]!=0)
  the.data$mediator <- as.numeric(the.data[,mediator])
  the.data$outcome <- the.data[,outcomes[index]]
  the.data <- na.omit(subset(the.data,select=c(mediator,treat,outcome)))
  model.m <- lm(mediator ~ treat,data=the.data )
  model.y <- lm(formula(outcome ~ treat + mediator),data=the.data )
  set.seed(1977)
  out <- mediate(model.m, model.y, sims=1000, boot=TRUE,treat="treat",mediator="mediator")
  res[1] <-  out$d0
  res[2] <-  out$d0.p
  res[3] <-  out$z0
  res[4] <-  out$z0.p
  res[5] <-  out$tau.coef
  res[6] <-  out$tau.p
  res[7] <-  out$n0
  res[8] <-  out$n0.p
  return(res)
}


est_modmediation <- function(index,  treat="Z5", moderator="lula1bin_b",mediator="know_meigeneral_e",the.data=ed){
  require(mediation)
  res <- as.data.frame(matrix(NA, 1, 16
                              ,dimnames = list(outcomes[index],c("ACMEl","ACMEpl","ADEl","ADEpl","Totall","Totalpl","PropMedl","ProbMedpl",
                                                                 "ACMEh","ACMEph","ADEh","ADEph","Totalh","Totalph","PropMedh","ProbMedph"))))
  if(treat=="Z5"){
    the.data <- subset(the.data,Z!=1) 
    if(index==1){cat("Using only Z=0 and Z=5\n")}
  }
  if(treat=="Z1"){
    the.data <- subset(the.data,Z!=5)
    if(index==1){cat("Using only Z=0 and Z=1\n")}
  }
  treat = "Zcomb"
  the.data$mediator <- the.data[,mediator]
  the.data$treat <- the.data[,treat]
  the.data$outcome <- the.data[,outcomes[index]]
  the.data$moderator <- the.data[,moderator]
  the.data <- na.omit(subset(the.data,select=c(mediator,moderator,treat,outcome)))
  model.m <- lm(mediator~ treat*moderator,data=the.data )
  model.y <- lm(outcome~ treat*moderator + mediator,data=the.data )
  set.seed(1976)
  out0 <- mediate(model.m, model.y, sims=1000, treat="treat", mediator="mediator",
                  covariates = list(moderator = 0))
  out1 <- mediate(model.m, model.y, sims=1000, treat="treat", mediator="mediator",
                  covariates = list(moderator = 1))
  res[1] <-  out0$d0
  res[2] <-  out0$d0.p
  res[3] <-  out0$z0
  res[4] <-  out0$z0.p
  res[5] <-  out0$tau.coef
  res[6] <-  out0$tau.p
  res[7] <-  out0$n0
  res[8] <-  out0$n0.p
  res[9] <-  out1$d0
  res[10] <- out1$d0.p
  res[11]<-  out1$z0
  res[12]<-  out1$z0.p
  res[13]<-  out1$tau.coef
  res[14]<-  out1$tau.p
  res[15]<-  out1$n0
  res[16]<-  out1$n0.p
  return(res)
}

## Recoding #################################################

# Rescale income 
ed$income_survey_b <- log(1+ed$income_survey_b)
ed$income_survey_e <- log(1+ed$income_survey_e)

# Transform ptID and Lula to binary  
ed$ptIDbin_b <- car::recode(ed$ptID_b,"c(0,1)=1;-1=0")
ed$lula1bin_b <- car::recode(ed$lula1_b,"c(-2,-1)=0;c(1,2)=1")

# Transform know_meifeatures to binary
ed$know_meifeaturesraw_e <- ed$know_meifeatures_e
ed$know_meifeatures_e <- as.numeric(ed$know_meifeatures_e > -0.5)

# Combine expectations ex-ante to create "correct expectations"
# Correct cost was between 48,70  53,70, depending on business characteristics
ed$expect_correct <- ed$expect_cost_b>18.7&
                            ed$expect_cost_b<83.7
ed$expect_correct[which(ed$expect_cost_b==-999)]<-NA

# Redefine taxcompliance so that individuals who did not formalize are coded as 0s 
# instead of NAs. This is because "taxcompliance" is only possible for those who formalize
ed$taxcompliance_admin_e[which(ed$formalize_admin_e==0)]<-0


# Create indices ################################################
# Follows Metaketa II metaanalysis code 
# (except that we code cases in which
# all constituent items are missing as missing for the index)

    
#Index of Trust in Government (Endline)
tmp <- subset(ed,select=c(trust2_survey_e,
                          trust3_survey_e,
                          trust4_survey_e,
                          trust6_survey_e,
                          trust_other2_survey_e))
no.info <- which(apply(is.na(tmp),1,sum,na.rm=T)==ncol(tmp))
# Impute missing values
ed <- ed %>%
  split(.$country) %>%
  map_df(function(x) {
    predictor.matrix = matrix(data = c(rep(1, 25)),
                              ncol = 5,
                              byrow = TRUE)
    diag(predictor.matrix) = 0
    x %>%
      dplyr::select(trust2_survey_e,
                    trust3_survey_e,
                    trust4_survey_e,
                    trust6_survey_e,
                    trust_other2_survey_e) %>%
      mice(
        data = .,
        m = 1,
        method = "norm.predict",
        predictorMatrix = predictor.matrix
      ) %>%
      mice::complete(.) %>%
      cbind(
        .,
        x %>% dplyr::select(
          -trust2_survey_e,
          -trust3_survey_e,-trust4_survey_e,
          -trust6_survey_e,-trust_other2_survey_e
        )
      )
  })
#Remove cases for which there are no items (don't impute those)
ed$trust2_survey_e[no.info]<-NA
ed$trust3_survey_e[no.info]<-NA
ed$trust4_survey_e[no.info]<-NA
ed$trust6_survey_e[no.info]<-NA
ed$trust_other2_survey_e[no.info]<-NA
# Making the index:
ed <- ed %>%
  split(.$country) %>%
  # Standardize each component/measure
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(
                trust2_survey_e,
                trust3_survey_e,
                trust4_survey_e,
                trust6_survey_e,
                trust_other2_survey_e
              ),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  }) %>%
  dplyr::select(trust2_survey_e,
                trust3_survey_e,
                trust4_survey_e,
                trust6_survey_e,
                trust_other2_survey_e) %>%
  (function(x) {
    names(x) = paste0("std_", names(x))
    x
  }) %>%
  # Average all z scores for an individual
  mutate(index_trustgovt_e = pmap_dbl(., ~ mean(c(...), na.rm = T))) %>%
  cbind(ed, .) %>% 
  # Re-standardize that single zscore by country control mean and standard deviation
  split(.$country) %>%
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(index_trustgovt_e),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  })


#Index of Trust in Civil Society (Endline)
tmp <- subset(ed,select=c(trust5_survey_e, trust_other1_survey_e))
no.info <- which(apply(is.na(tmp),1,sum,na.rm=T)==ncol(tmp))
# Impute missing values
ed <- ed %>%
  split(.$country) %>%
  map_df(function(x) {
    predictor.matrix = matrix(data = c(rep(1, 4)),
                              ncol = 2,
                              byrow = TRUE)
    diag(predictor.matrix) = 0
    x %>%
      dplyr::select(trust5_survey_e, trust_other1_survey_e) %>%
      mice(
        data = .,
        m = 1,
        method = "norm.predict",
        predictorMatrix = predictor.matrix
      ) %>%
      mice::complete(.) %>%
      cbind(., x %>% dplyr::select( -trust5_survey_e, -trust_other1_survey_e))
  })
#Remove cases for which there are no items (don't impute those)
ed$trust5_survey_e[no.info]<-NA
ed$trust_other1_survey_e[no.info]<-NA
#Assemble index
ed <- ed %>%
  split(.$country) %>%
  # Standardize each component/measure
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(trust5_survey_e, trust_other1_survey_e),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  }) %>%
  dplyr::select( trust5_survey_e, trust_other1_survey_e) %>%
  (function(x) {
    names(x) = paste0("std_", names(x))
    x
  }) %>%
  # Average all z scores for an individual
  mutate(index_trustcivil_e = pmap_dbl(., ~ mean(c(...), na.rm = T))) %>%
  cbind(ed, .) %>% 
  # Re-standardize that single zscore by country control mean and standard deviation
  split(.$country) %>%
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(index_trustcivil_e),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  })
ed <- ed[,-grep("std_",names(ed))]#cleanup

#Index of Trust in Government (Baseline)
tmp <- subset(ed,select=c(trust2_survey_b,
                          trust3_survey_b,
                          trust4_survey_b,
                          trust6_survey_b,
                          trust_other2_survey_b))
no.info <- which(apply(is.na(tmp),1,sum,na.rm=T)==ncol(tmp))
# Impute missing values
ed <- ed %>%
  split(.$country) %>%
  map_df(function(x) {
    predictor.matrix = matrix(data = c(rep(1, 25)),
                              ncol = 5,
                              byrow = TRUE)
    diag(predictor.matrix) = 0
    x %>%
      dplyr::select(trust2_survey_b,
                    trust3_survey_b,
                    trust4_survey_b,
                    trust6_survey_b,
                    trust_other2_survey_b) %>%
      mice(
        data = .,
        m = 1,
        method = "norm.predict",
        predictorMatrix = predictor.matrix
      ) %>%
      mice::complete(.) %>%
      cbind(
        .,
        x %>% dplyr::select(
          -trust2_survey_b,
          -trust3_survey_b,-trust4_survey_b,
          -trust6_survey_b,-trust_other2_survey_b
        )
      )
  })
#Remove cases for which there are no items (don't impute those)
ed$trust2_survey_b[no.info]<-NA
ed$trust3_survey_b[no.info]<-NA
ed$trust4_survey_b[no.info]<-NA
ed$trust6_survey_b[no.info]<-NA
ed$trust_other2_survey_b[no.info]<-NA
# Making the index:
ed <- ed %>%
  split(.$country) %>%
  # Standardize each component/measure
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(
                trust2_survey_b,
                trust3_survey_b,
                trust4_survey_b,
                trust6_survey_b,
                trust_other2_survey_b
              ),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  }) %>%
  dplyr::select(trust2_survey_b,
                trust3_survey_b,
                trust4_survey_b,
                trust6_survey_b,
                trust_other2_survey_b) %>%
  (function(x) {
    names(x) = paste0("std_", names(x))
    x
  }) %>%
  # Average all z scores for an individual
  mutate(index_trustgovt_b = pmap_dbl(., ~ mean(c(...), na.rm = T))) %>%
  cbind(ed, .) %>% 
  # Re-standardize that single zscore by country control mean and standard deviation
  split(.$country) %>%
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(index_trustgovt_b),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  })

#Index of Trust in Civil Society (Baseline)
tmp <- subset(ed,select=c(trust5_survey_b, trust_other1_survey_b))
no.info <- which(apply(is.na(tmp),1,sum,na.rm=T)==ncol(tmp))
# Impute missing values
ed <- ed %>%
  split(.$country) %>%
  map_df(function(x) {
    predictor.matrix = matrix(data = c(rep(1, 4)),
                              ncol = 2,
                              byrow = TRUE)
    diag(predictor.matrix) = 0
    x %>%
      dplyr::select(trust5_survey_b, trust_other1_survey_b) %>%
      mice(
        data = .,
        m = 1,
        method = "norm.predict",
        predictorMatrix = predictor.matrix
      ) %>%
      mice::complete(.) %>%
      cbind(., x %>% dplyr::select( -trust5_survey_b, -trust_other1_survey_b))
  })
#Remove cases for which there are no items (don't impute those)
ed$trust5_survey_b[no.info]<-NA
ed$trust_other1_survey_b[no.info]<-NA
#Assemble index
ed <- ed %>%
  split(.$country) %>%
  # Standardize each component/measure
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(trust5_survey_b, trust_other1_survey_b),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  }) %>%
  dplyr::select( trust5_survey_b, trust_other1_survey_b) %>%
  (function(x) {
    names(x) = paste0("std_", names(x))
    x
  }) %>%
  # Average all z scores for an individual
  mutate(index_trustcivil_b = pmap_dbl(., ~ mean(c(...), na.rm = T))) %>%
  cbind(ed, .) %>% 
  # Re-standardize that single zscore by country control mean and standard deviation
  split(.$country) %>%
  map_df(function(x) {
    Z = x$Z
    mutate_at(x,
              vars(index_trustcivil_b),
              function(x)
                (x - mean(x[Z == 0], na.rm = T)) / sd(x[Z == 0], na.rm = T))
  })
ed <- ed[,-grep("std_",names(ed))]#cleanup

# More recoding using imputed variables 
ed$trust_fedgov_b <- as.numeric(ed$trust2_survey_b>1)
ed$trust_fedgov_b[which(is.na(ed$trust2_survey_b))]<-0 #missing responses at baseline are DK, which is no trust
ed$trust_fedgov_e <- as.numeric(ed$trust2_survey_e>1)


# Attrition ###########################################
# Compare attrition indicators
table(Survey=ed$attrition,Admin=ed$attrition_admin)
# Compliance vs. non-attriters and attriters
table(D=ed$D,SurveyAttr=ed$attrition,Z=ed$Z)
# Compliance vs.  non-attriters and attriters
table(D=ed$D,AdminAttr=ed$attrition_admin,Z=ed$Z)

# Examine attrition (endline survey)
# Table B2a (Endline Survey)
reg_at1 <- lm_robust(attrition~Z,se_type="stata",data=ed)  
print(xtable(summary(reg_at1)$coefficients[,c(1,2,4)]),digits=3,
      caption="Attrition on Endline Survey",label="tab:at1",
      type="latex",hline.after=c(-1,1,3),caption.placement="top",
      file=paste("ReplicFigures/tab-B2a.tex",sep=""))

# Examine attrition (administrative data)
# Table B2b (Administrative Data) 
reg_at2 <- lm_robust(attrition_admin~Z,se_type="stata",data=ed)  
print(xtable(summary(reg_at2)$coefficients[,c(1,2,4)]),digits=3,
      caption="Attrition on Administrative Outcomes",label="tab:at2",
      type="latex",hline.after=c(-1,1,3),caption.placement="top",
      file=paste("ReplicFigures/tab-B2b.tex",sep=""))
# Somewhat lower attrition among the treated for admin data
# Makes sense because we had two opportunities to collect CPF data from those 
# who were treated, and only one from those in the control group
# Examine balance between attrited and non-attried on baseline variables
abal <- c("access_survey_b",
          "access_publicse_survey_b",
          "rses_b",
          "life_b",
          "intpertrust_b",
          "entrepreneurship_b",
          "managerialstyle_b",
          "female_survey_b",
          "age_survey_b",
          "education_survey_b",
          "income_survey_b")

abal.labels <- c("Access (Index)","Access (Service)","Self Esteem",
                 "Life Satisfaction","Interpersonal Trust","Entrepreneurship",
                 "Managerial Style","Gender (Female)","Age","Education","Income")
ann <- length(abal)    

#F-tests for predicting attrition
#Cited in the text in Appendix B
the.formula <- formula(paste("attrition ~",paste(abal ,collapse="+")))
print(summary(lm(the.formula,data=ed)))
the.formula <- formula(paste("attrition_admin ~",paste(abal ,collapse="+")))
print(summary(lm(the.formula,data=ed)))

#Compute difference in means for each baseline variable on survey attrition
outcomes <- abal 
attr_1 <- bind_rows(lapply(1:ann, FUN = est_attr, treat="attrition",std=T,the.data=ed))
rownames(attr_1)<-outcomes
print(round(attr_1[,-c(5:7)],4)) 

#Compute difference in means for each baseline variable on admin attrition
attr_2 <- bind_rows(lapply(1:ann, FUN = est_attr, treat="attrition_admin",std=T))
rownames(attr_2)<-outcomes
print(round(attr_2[,-c(5:7)],4))

# Hardly any imbalance one pre-treatment observables 
# between attrited and non-attrited

# Plot attrition balance
# Figure B.3 (Appendix)

pdf(file = "ReplicFigures/figure-B3.pdf",
    width = 7, height = 8)
par(mar=c(4,6,3,.5))
bal1 <-attr_1[,"Std. Estimate"]
bal2 <-attr_2[,"Std. Estimate"]
plot(bal1,-1*(1:nrow(attr_1)+0.05),bty="n"
     ,xlab="Standardized Mean Differences",ylab="",xlim=c(-.6,.6)
     ,yaxt="n",  pch=ifelse(attr_1[,4]>0.1,22,24)
     ,xpd=NA,main="Attrition Balance")
points(bal2,-1*(1:nrow(attr_1)-0.05), pch=ifelse(attr_1[,4]>0.1,22,24),bg=gray(0.5))
abline(v=c(-.25,0,.25),lty=c(2,1,2),col=c(gray(0.5),1,gray(0.5)))
axis(side=2,at=-1*1:nrow(attr_1),labels=abal.labels,tick=F,las=2,line=-2,xpd=NA,cex.axis=1.1)
legend(x="right",legend=c("Endline","Admin"),pt.bg=c(0,gray(0.5)),pch=c(22,22),bty="n",cex=0.9)
dev.off()

# Ineligibility ####################################################################
#TABLE C3
print(table(Ineligible=ed$ineligible,Attrition=ed$attrition))
reg_i <- lm_robust(ineligible~Z,se_type="stata",data=ed) 
print(xtable(summary(reg_i)$coefficients[,c(1,2,4)],digits=3
      ,caption="Ex-ante ineligibility across treatment conditions",label="tab:ineligibility:balance",
      type="latex",hline.after=c(-1,0,nrow(analytical.table),
      caption.placement="top",file=paste("ReplicFigures/tab-C3.tex",sep=""))))

# F-tests for predicting ineligibility
# Cited in text of Appendix C
the.formula <- formula(paste("ineligible ~",paste(abal ,collapse="+")))
print(summary(lm(the.formula,data=ed)))

# Balance on Selected Baseline vars ################################################
balvars <- c("lula1bin_b","lula2_b","ptIDbin_b","index_lulapt_b",
             "intpertrust_b","trust_fedgov_b","index_trustgovt_b"
             ,"intenttoformalize_survey_b"
             ,"expect_correct"
             ,"incomebracket_survey_b"#income brackets
             ,"female_survey_b"#sex
             ,"white_b"#race
            ,"ineligible")

ballabels <-  c("Lula (eval)","Lula (vote)","PT Support","Index Lula-PT",
                "Interp. Trust","Trust Fed. Gov.","Index Trust in Gov.",
                "Intent to Form.","Knowledge\n(Cost)",
                "Income","Female","White",
                "Ineligible")

# Balance up for plotting (standardized differences between treatments)
# FIGURE C.4
outcomes <- balvars
ed01 <- subset(ed,Z!=5)
ed01$Z <- factor(ed01$Z)
bal_1 <- bind_rows(lapply(1:length(balvars), FUN = est_attr, treat="Z", the.data=ed01, std=T))
ed05 <- subset(ed,Z!=1)
ed05$Z <- factor(ed05$Z)
bal_5 <- bind_rows(lapply(1:length(balvars), FUN = est_attr, treat="Z", the.data=ed05,std=T))
rownames(bal_1) <- rownames(bal_5) <- balvars 

pdf(file = "ReplicFigures/figure-C4.pdf",
    width = 7, height = 8)
par(mar=c(4,7,2,.5))
bal1 <-bal_1[,"Std. Estimate"]
bal5 <-bal_5[,"Std. Estimate"]
plot(bal1,-1*(1:nrow(bal_1)+0.05),bty="n"
     ,xlab="Standardized Mean Differences",ylab="",xlim=c(-.6,.6)
     ,yaxt="n",  pch=ifelse(bal_1[,4]>0.1,22,24),xpd=NA,main="")
points(bal5,-1*(1:nrow(bal_5)-0.05), pch=ifelse(bal_5[,4]>0.1,22,24),bg=gray(0.5))
abline(v=c(-.25,0,.25),lty=c(2,1,2),col=c(gray(0.5),1,gray(0.5)))
axis(side=2,at=-1*1:nrow(bal_1),labels=ballabels,tick=F,las=2,line=-2,xpd=NA,cex.axis=1.1)
legend(x="topright",legend=c("Main","Endorsement","(p < 0.1)"),pt.bg=c(0,gray(0.5),0),pch=c(22,22,24),bty="n",cex=0.9)
dev.off()

# Controls and Moderators  #######################################################

#Declare political moderators for Endorsement study
moderators <- c("lula1bin_b","lula2_b","ptIDbin_b","index_lulapt_b")
label.moderators <- c("Lula (eval)","Lula (vote)","PT Support","Index Lula-PT")

#Declare Trust moderators
moderators2 <- c("intpertrust_b","trust_fedgov_b") 
label.moderators2 <- c("Interp. Trust","Trust Fed. Gov.") 

#Declare controls
the.ctrls <- c("incomebracket_survey_b"#income brackets
               ,"female_survey_b"#sex
               ,"white_b"#race
               ,"bussize_b"#size of business
               ,"bussector_b"#sector of activity
               ,"bustype_b"#type of business (fixed store or street vendor)"
               ,"bolsafamilia_admin_b" #unplanned (bolsa familia participation)
) 

## Correlations between  the political moderators
# FIGURE F6a in appendix
library(corrgram)
tmp <- subset(ed,select=moderators)
pdf(file = "ReplicFigures/figure-F6a.pdf",
    width = 7, height = 8)
par(mar=c(3,3,1,1))
corrgram(tmp,lower.panel = panel.conf,   labels=label.moderators)
dev.off()

## Association of political moderators with pre-treatment characteristics:
# Mentioned in the text of appendix G
tmp01 <- cor.test(ed$index_lulapt_b,ed$schooling_survey_b)
tmp02 <- cor.test(ed$index_lulapt_b,ed$income_survey_b) 
cat("There is a negative correlation between  the Lula-PT support index and years of schooling and",
    " (r=",round(tmp01$est,3),", p<",0.001,") and income  (r=",round(tmp02$est,3),", p<",0.001,").\n",sep="")
#intent to formalize
tmp1 <- lm_robust(index_lulapt_b~intenttoformalize_survey_b,data=subset(ed,ineligible==0))
#knowledge (of costs)
tmp2 <- lm_robust(index_lulapt_b~expect_correct,data=subset(ed,ineligible==0))

cat("Those who reported having intented to formalize at baseline scored lower on the Lula-PT index (",
    round(tmp1$coef[2],3),", p=",
    round(tmp1$p.value[2],3),"), and so did those who reported some knowledge about the MEI ",
    "program at baseline (",
    round(tmp2$coef[2],3),", p=",
    round(tmp2$p.value[2],3),
    "), though this last variable was only measured for those who had considered formalizing.\n",sep="")

# Figure G8
# Standardized difference in means between eligible and ineligible
# Including a comparison elegibles/ineligibles ex-ante on partisanship 
# Request by the JOP R/R
# Here we use data from *all* treatment arms, as discussed in the appendix
dbrazil[dbrazil == -997|dbrazil == -998|dbrazil==-999] <- NA
  #Recode the lula support to binary variables, as we did in the ed object
  dbrazil$lula1bin_b <- car::recode(dbrazil$lula1_b,"c(-2,-1)=0;c(1,2)=1")
  dbrazil$ptIDbin_b <- car::recode(dbrazil$ptID_b,"c(0,1)=1;-1=0")
outcomes <-  c(abal,
               moderators) 
inn <- length(outcomes)
ineligibility <- bind_rows(lapply(1:inn, FUN = est_attr, treat="ineligible",std=T,the.data=dbrazil))
rownames(ineligibility )<-outcomes
print(round(ineligibility [,-c(5:7)],4)) 
ineligiblity.labels <- c(abal.labels,label.moderators)
pdf(file = "ReplicFigures/figure-G8.pdf",
    width = 7, height = 8)
par(mar=c(4,7,3,.5),mfrow=c(1,1))
bal <-ineligibility[,"Std. Estimate"]
plot(bal,-1*(1:nrow(ineligibility)+0.05),bty="n"
     ,xlab="Standardized Mean Differences",ylab="",xlim=c(-.6,.6)
     ,yaxt="n",  pch=ifelse(ineligibility[,4]>0.05,22,24)
     ,xpd=NA,main="Inegilibity Balance")
abline(v=c(-.25,0,.25),lty=c(2,1,2),col=c(gray(0.5),1,gray(0.5)))
abline(h=-11.5,lwd=2,col=gray(0.7))
axis(side=2,at=-1*1:nrow(ineligibility),labels=ineligiblity.labels,tick=F,las=2,line=-2,xpd=NA,cex.axis=1.1)
legend(x="bottomright"
       ,legend=c("p>=0.05","p<0.05"),pch=c(22,24),bty="n",cex=0.9
       ,title="Sig. of Diff.")
dev.off()

# Examine variation within ineligibles (using the whole sample)
# Table G7
# Did petistas ineligibles formalize earlier than non-petistas?  
# Answer: no
the.ineligibles <- subset(dbrazil,ineligible==T)
  the.ineligibles$days.formalized <- as.numeric(as.Date("2017-12-31")-as.Date(the.ineligibles$date_formalized))
  the.ineligibles$months.formalized <- the.ineligibles$days.formalized/30.41667
  regd2 <- lm_robust(months.formalized~lula1bin_b,se_type="stata",data=the.ineligibles)
  regd3 <- lm_robust(months.formalized~lula2_b,se_type="stata",data=the.ineligibles)
  regd4 <- lm_robust(months.formalized~ptIDbin_b,se_type="stata",data=the.ineligibles)
  regd1 <- lm_robust(months.formalized~index_lulapt_b,se_type="stata",data=the.ineligibles)
  regsd <- list(regd1,regd2,regd3,regd4)

#TABLE G7
print(texreg::texreg(regsd
               ,caption.above = T
               ,caption = "Days since formalization (ineligible only)"
               ,label = "tab:daysformal"
              ,custom.coef.names = c("Intercept",label.moderators[c(4,1,2,3)])
               ,ci.force = FALSE
              , file="ReplicFigures/tab-G7.tex")) #table in the appendix

# Declared a modified function to deal with data from the whole project
est_selection <- function(index, the.data=dbrazil,w=NULL,std=F){
    require("ANOVAreplication")
    #results_t <- as.data.frame(matrix(NA, 1, 8))
    the.data$groups <- ifelse(the.data$ineligible& the.data$index_lulapt_b>0,"ineligible-supporter",
                      ifelse(the.data$ineligible& the.data$index_lulapt_b<0,"ineligible-detractor",
                      ifelse(the.data$ineligible==F& the.data$index_lulapt_b>0,"eligible-supporter",
                      ifelse(the.data$ineligible==F&the.data$index_lulapt_b<0,"eligible-detractor",NA))))
    the.formula<-formula(paste(outcomes[index],"~ groups -1"))
    reg <- lm_robust(the.formula, data = the.data,alpha = 0.1,weights=w,se_type="stata")
    results_t <- list(summary(reg)$coef)
    names(results_t) <- outcomes[index]
    return(results_t)
}

#Figure G.9
outcomes <-  c(abal) 
inn <- length(outcomes)
selection <- lapply(1:inn, FUN = est_selection, std=F,the.data=dbrazil)
names(selection)<-c(abal.labels)
pdf(file = "ReplicFigures/figure-G9.pdf",
    width = 9, height = 9)
      x <- selection
      n <- length(x)
      par(mfrow=c(3,4),mar=c(4,2,4,1))
     for(i in 1:n){
      the.main <- names(x)[[i]]
      xx<-x[[i]][[1]]
      bgs <- c("white","black","white","black")
      pchs <- c(21,21,24,24)
      ylims <- c(NA,NA)
      ylims[1] <- floor(min(xx[,"CI Lower"]))
      ylims[2] <- ceiling(max(xx[,"CI Upper"]))
      if((ylims[1]==0|ylims[1]==-1)&ylims[2]==1){
        ylims[1] <- round(min(xx[,"CI Lower"]),1)-0.1
        ylims[2] <- round(max(xx[,"CI Upper"]),1)+0.1 
      }
      xs <- c(.8,1.2,1.8,2.2)
      plot(c(0.6,2.4),ylims,type="n",ylab="",xlab="",bty="n"
           ,xaxt="n",main=the.main,cex.main=1.4)
      axis(side=1,at=xs[1:2],labels=c("Detr.","Sup."),cex.axis=0.9,line=-.1)
      axis(side=1,at=xs[3:4],labels=c("Detr.","Sup."),cex.axis=0.9,line=-.1)
      axis(side=1,line=2,at=c(1,2),labels=c("Eligible\n(Informal)","Ineligible\n(Formal)"),tick=F)
      segments(x0=xs,x1=xs,
               y0=xx[,"CI Lower"],y1=xx[,"CI Upper"])
      points(xs,xx[,"Estimate"],bg=bgs,pch=pchs,cex=1.4)
     }
dev.off()

# Examine missingness in control variables (on non-attrited cases)
as.matrix(apply(is.na(subset(ed,select=the.ctrls
                             ,attrition==0|attrition_admin==0)),2,sum))

# Create  "missing" category for NA observations in control variables
# Make sure controls with more than two categories are treated as categorical variables
ed$incomebracket_survey_b[which(is.na(ed$incomebracket_survey_b))] <- -999
ed$incomebracket_survey_b <- factor(ed$incomebracket_survey_b)
ed$white_b[which(is.na(ed$white_b))] <- -999
ed$white_b <- factor(ed$white_b)
ed$bolsafamilia_admin_b[which(is.na(ed$bolsafamilia_admin_b))] <- -999
ed$bolsafamilia_admin_b <- factor(ed$bolsafamilia_admin_b)
ed$female_survey_b[which(is.na(ed$female_survey_b))] <- -999
ed$female_survey_b <- factor(ed$female_survey_b)
ed$bussize_b[which(is.na(ed$bussize_b))] <- -999
ed$bussize_b <- factor(ed$bussize_b)
ed$bussector_b[which(is.na(ed$bussector_b))] <- -999
ed$bussector_b <- factor(ed$bussector_b)
as.matrix(apply(is.na(subset(ed,select=the.ctrls
                             ,attrition==0|attrition_admin==0)),2,sum))

# Subset of outcomes in the PAP plus unplanned knowledge and trust outcomes
paper.outcomes <- c("intenttoformalize_mixed_e",
                    #"intenttoformalize_admin_e",
                    "formalize_admin_e",
                    "taxcompliance_admin_e",
                    "know_meigeneral_e",
                    "know_meifeatures_e"
)
label.outcomes <- c("Intent to Formalize",
                    #"Intent to Formalize (Admin)",
                    "Formalize",
                    "Formalize and Comply",
                    "Knowledge\n(General)",
                    "Knowledge\n(Features)"
                 #   "Trust in Sebrae",
                  #  "Trust in Fed. Goverment"
                    )

paper.baselines <- c(1,1
                     ,1,1,1
                    # "trust_sebrae_b",
                    # "trust_fedgov_b"
)
pn <- length(paper.outcomes)
print(data.frame(paper.outcomes,paper.baselines)) #check baselines are correctly associated wiht outcomes

## Eliminate ex-ante ineligibles (main analysis in paper)
ed.all <- ed
ed <- subset(ed,ineligible==0)

## Create table of descriptives statistics for paper
## TABLE 1
outcomes <- c(paper.outcomes,moderators,moderators2)
the.summary <- bind_rows(lapply(1:length(outcomes), FUN = my.summary, the.data=ed))
rownames(the.summary) <- c(label.outcomes,label.moderators,label.moderators2)
the.summaryx <- xtable(the.summary, digits=c(0,2,2,0,0,0,0))
  caption(the.summaryx) <- "Descriptive Statistics" 
  label(the.summaryx) <- "tab:descriptive" 
  print(the.summaryx,caption.placement = "top")
  print(the.summaryx,digits=3,caption="Descriptive Statistics"
               ,label="tab:descriptive",type="latex",hline.after=c(0,nrow(the.summaryx)),
        caption.placement="top",file=paste("ReplicFigures/tab-1.tex",sep=""))
  
# FIGURE F6b: Descriptive plot of the index of support for Lula and the PT
pdf(file = paste("ReplicFigures/figure-F6b.pdf",sep=""),
      width = 7, height = 8)
  par(mfrow=c(1,1),mar=c(4,4,1,1))
  hist(ed$index_lulapt_b
     ,main=""
     ,xlab="Index of Support for Lula and the PT")
dev.off()


## Analysis ####################################################################
## Estimation using only variables used in paper, but otherwise following PAP #
## Raw Diferences in Outcome # 
## Columns with "1" are a comparison of Z1 with Z0; with "5" compare Z5 with Z0
outcomes <- paper.outcomes
res_1 <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=ed))
rownames(res_1) <-  outcomes
print(round(res_1[,c(1:4,8:11)],4))

res_1i <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=ed.all))
rownames(res_1i) <-  outcomes
print(round(res_1i[,c(1:4,8:11)],4))


## (PAP 2) Raw Differences in Outcome by Pre-treatment Assessment of Lula # 
## Assessment of the Lula Moderation; 
outcomes <- paper.outcomes
nn <- length(outcomes)
baselines <- paper.baselines

### Using Lula-PT Index as political moderator
res_2pos <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=subset(ed,index_lulapt_b>=0)))
res_2neg <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=subset(ed,index_lulapt_b<0)))
rownames(res_2pos) <- rownames(res_2neg)  <- outcomes
#Raw differences for positive view of lula/pt
print(round(res_2pos[,c(1,2,4,8,9,11,15,16)],3))
#Raw differences for negative view of lula/pt
print(round(res_2neg[,c(1,2,4,8,9,11,15,16)],3))

### Using Lula-PT Index as political moderator 
### Keeping ineligibles
res_2ipos <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=subset(ed.all,index_lulapt_b>=0)))
res_2ineg <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=subset(ed.all,index_lulapt_b<0)))
rownames(res_2ipos) <- rownames(res_2ineg)  <- outcomes
#Raw diferences for positive view of lula/pt
print(round(res_2ipos[,c(1,2,4,8,9,11,15,16)],3))
#Raw diferences for negative view of lula/pt
print(round(res_2ineg[,c(1,2,4,8,9,11,15,16)],3))


### Using Lula Eval as political moderator
res_2pos2 <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=subset(ed,lula1bin_b==1)))
res_2neg2 <- bind_rows(lapply(1:length(outcomes), FUN = est_dm, the.data=subset(ed,lula1bin_b==0)))
rownames(res_2pos2) <- rownames(res_2neg2)  <- outcomes
#Raw diferences for positive view of lula/pt
print(round(res_2pos2[,c(1,2,4,8,9,11,15,16)],3))
#Raw diferences for negative view of lula/pt
print(round(res_2neg2[,c(1,2,4,8,9,11,15,16)],3))


# (PAP 3) ITT #
outcomes <- paper.outcomes
nn <- length(outcomes)
baselines <- paper.baselines
res_3b <-  bind_rows(lapply(1:nn, FUN = est_itt,fe=F,ctrls=NULL)) 
res_3c <-  bind_rows(lapply(1:nn, FUN = est_itt,fe=F,ctrls=c(the.ctrls))) 
rownames(res_3b) <- rownames(res_3c)  <-  outcomes 
print(round(res_3b[,c(1,2,4,8,9,11)],4))

#Robustness: keeping ineligibles
res_3bi <-  bind_rows(lapply(1:nn, FUN = est_itt,fe=F,the.data=ed.all,ctrls=c("ineligible"))) 
res_3ci <-  bind_rows(lapply(1:nn, FUN = est_itt,fe=F,the.data=ed.all,ctrls=c("ineligible",the.ctrls))) 
rownames(res_3bi) <- rownames(res_3ci)  <-  outcomes 
print(round(res_3bi[,c(1,2,4,8,9,11)],4))

# (PAP 4) Political Moderation #
# All outcomes are examined for each political moderator #
# Estimate models with ineligibility and with all PAP controls + ineligibility #
outcomes <- paper.outcomes
nn <- length(outcomes)
baselines <- paper.baselines

res_41c <- res_41d <- res_41ci <- res_41di <-list()#treatment T1 vs. T0
for(i in 1:length(moderators)){
  res_41c[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , FUN = est_mod2,fe=T
                                                , mod=moderators[i],treat="Z1"
                                                ,ctrls=NULL))
  res_41d[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , FUN = est_mod2,fe=F
                                                , mod=moderators[i],treat="Z1"
                                                ,ctrls=c(the.ctrls)))
  res_41ci[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , the.data = ed.all
                                                , FUN = est_mod2,fe=T
                                                , mod=moderators[i],treat="Z1"
                                                ,ctrls="ineligible"))
  res_41di[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , the.data = ed.all
                                                , FUN = est_mod2,fe=F
                                                , mod=moderators[i],treat="Z1"
                                                ,ctrls=c("ineligible",the.ctrls)))#,baselines
  
  }
res_41dall <- do.call("rbind",res_41d) #combine them all into a single data.frame
res_41call <- do.call("rbind",res_41c) #combine them all into a single data.frame
res_41dall$outcome <- res_41call$outcome <-outcomes
res_41dall$moderator <- gsub("\\.\\d\\d?","",rownames(res_41dall))
res_41call$moderator <- gsub("\\.\\d\\d?","",rownames(res_41call))
rownames(res_41dall)<-rownames(res_41call)<-NULL
# Now keeping ineligibles
res_41diall <- do.call("rbind",res_41di) #combine them all into a single data.frame
res_41ciall <- do.call("rbind",res_41ci) #combine them all into a single data.frame
res_41diall$outcome <- res_41ciall$outcome <-outcomes
res_41diall$moderator <- gsub("\\.\\d\\d?","",rownames(res_41diall))
res_41ciall$moderator <- gsub("\\.\\d\\d?","",rownames(res_41ciall))
rownames(res_41diall)<-rownames(res_41ciall)<-NULL
# Look at political moderation for Z1 v. Z0
print(res_41dall[,15:23])

# Political moderation for Z5 v. Z0
res_45c <- res_45d <- res_45ci <- res_45di <-list()
for(i in 1:length(moderators)){
  res_45c[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
             , FUN = est_mod2,fe=T
             , mod=moderators[i],treat="Z5"
             , ctrls=NULL))
  res_45d[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                               , FUN = est_mod2,fe=F
                                               , mod=moderators[i]
                                               , treat="Z5"
                                               , ctrls=c(the.ctrls)))
  res_45ci[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , the.data= ed.all
                                                , FUN = est_mod2,fe=T
                                                , mod=moderators[i],treat="Z5"
                                                , ctrls="ineligible"))
  res_45di[[moderators[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , the.data=ed.all
                                                , FUN = est_mod2,fe=F
                                                , mod=moderators[i]
                                                , treat="Z5"
                                                , ctrls=c("ineligible",the.ctrls)))#,baselines
  
  }
res_45call <- do.call("rbind",res_45c) #combine them all into a single data.frame
res_45dall <- do.call("rbind",res_45d) #combine them all into a single data.frame
res_45call$outcome <- res_45dall$outcome <-outcomes
res_45call$moderator <- gsub("\\.\\d\\d?","",rownames(res_45call))
res_45dall$moderator <- gsub("\\.\\d\\d?","",rownames(res_45dall))
rownames(res_45call)<-rownames(res_45dall)<-NULL
# Now keeping eligibles
res_45ciall <- do.call("rbind",res_45ci) #combine them all into a single data.frame
res_45diall <- do.call("rbind",res_45di) #combine them all into a single data.frame
res_45ciall$outcome <- res_45diall$outcome <-outcomes
res_45ciall$moderator <- gsub("\\.\\d\\d?","",rownames(res_45ciall))
res_45diall$moderator <- gsub("\\.\\d\\d?","",rownames(res_45diall))
rownames(res_45ciall)<-rownames(res_45diall)<-NULL

# Look at political moderation for Z5 v. Z0
print(res_45diall[,15:23])

# (NOT in PAP) Trust moderation ################
outcomes <- paper.outcomes
nn <- length(outcomes)
baselines <- paper.baselines

res_X1c <- res_X1d <- res_X1ci <- res_X1di <-list()#treatment T1 vs. T0
for(i in 1:length(moderators2)){
  res_X1c[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , FUN = est_mod2,fe=T
                                                , mod=moderators2[i],treat="Z1"
                                                ,ctrls=NULL))
  res_X1d[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , FUN = est_mod2,fe=F
                                                , mod=moderators2[i],treat="Z1"
                                                ,ctrls=c(the.ctrls)))#,baselines
  res_X1ci[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                 , the.data = ed.all
                                                 , FUN = est_mod2,fe=T
                                                 , mod=moderators2[i],treat="Z1"
                                                 ,ctrls="ineligible"))
  res_X1di[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                 , the.data = ed.all
                                                 , FUN = est_mod2,fe=F
                                                 , mod=moderators2[i],treat="Z1"
                                                 ,ctrls=c("ineligible",the.ctrls)))#,baselines
  
  }
res_X1dall <- do.call("rbind",res_X1d) #combine them all into a single data.frame
res_X1call <- do.call("rbind",res_X1c) #combine them all into a single data.frame
res_X1dall$outcome <- res_X1call$outcome <-outcomes
res_X1dall$moderator <- gsub("\\.\\d\\d?","",rownames(res_X1dall))
res_X1call$moderator <- gsub("\\.\\d\\d?","",rownames(res_X1call))
rownames(res_X1dall)<-rownames(res_X1call)<-NULL
# Now keeping ineligibles
res_X1diall <- do.call("rbind",res_X1di) #combine them all into a single data.frame
res_X1ciall <- do.call("rbind",res_X1ci) #combine them all into a single data.frame
res_X1diall$outcome <- res_X1ciall$outcome <-outcomes
res_X1diall$moderator <- gsub("\\.\\d\\d?","",rownames(res_X1diall))
res_X1ciall$moderator <- gsub("\\.\\d\\d?","",rownames(res_X1ciall))
rownames(res_X1diall)<-rownames(res_X1ciall)<-NULL
# Look at knowledge moderation for Z1 v. Z0
print(res_X1dall[,15:23])

# Knowledge moderation Z5 vs Z0
res_X5c <- res_X5d <-res_X5ci <- res_X5di <-list()
for(i in 1:length(moderators2)){
  res_X5c[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , FUN = est_mod2,fe=T
                                                , mod=moderators2[i],treat="Z5"
                                                , ctrls=NULL))
  res_X5d[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                , FUN = est_mod2,fe=F
                                                , mod=moderators2[i]
                                                , treat="Z5"
                                                , ctrls=c(the.ctrls)))#,baselines
  res_X5ci[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                 , the.data=ed.all
                                                 , FUN = est_mod2,fe=T
                                                 , mod=moderators2[i],treat="Z5"
                                                 , ctrls="ineligible"))
  res_X5di[[moderators2[i]]] <-  bind_rows(lapply(1:length(outcomes)
                                                 , the.data=ed.all
                                                 , FUN = est_mod2,fe=F
                                                 , mod=moderators2[i]
                                                 , treat="Z5"
                                                 , ctrls=c("ineligible",the.ctrls)))#,baselines
  
  }
res_X5call <- do.call("rbind",res_X5c) #combine them all into a single data.frame
res_X5dall <- do.call("rbind",res_X5d) #combine them all into a single data.frame
res_X5call$outcome <- res_X5dall$outcome <-outcomes
res_X5call$moderator <- gsub("\\.\\d\\d?","",rownames(res_X5call))
res_X5dall$moderator <- gsub("\\.\\d\\d?","",rownames(res_X5dall))
rownames(res_X5call)<-rownames(res_X5dall)<-NULL
# Keeping ineligibles
res_X5ciall <- do.call("rbind",res_X5ci) #combine them all into a single data.frame
res_X5diall <- do.call("rbind",res_X5di) #combine them all into a single data.frame
res_X5ciall$outcome <- res_X5diall$outcome <-outcomes
res_X5ciall$moderator <- gsub("\\.\\d\\d?","",rownames(res_X5ciall))
res_X5diall$moderator <- gsub("\\.\\d\\d?","",rownames(res_X5diall))
rownames(res_X5ciall)<-rownames(res_X5diall)<-NULL
# Look at knowledge moderation for Z5 v. Z0
print(res_X5dall[,15:23])


#  CACE (omitted from PAP)
outcomes <- paper.outcomes
nn <- length(outcomes)
baselines <- paper.baselines
res_5a <-  bind_rows(lapply(1:nn, FUN = est_cace,fe=T,the.data=ed)) #Cace equivalent to Dim
res_5b <-  bind_rows(lapply(1:nn, FUN = est_cace,fe=T,ctrls=NULL,the.data=ed)) 
res_5c <-  bind_rows(lapply(1:nn, FUN = est_cace,fe=T,ctrls=the.ctrls,baseline=F,the.data=ed)) #controls, baseline vals
res_5d <-  bind_rows(lapply(1:nn, FUN = est_cace,fe=F,ctrls=the.ctrls,baseline=T,the.data=ed)) #controls, baseline vals
rownames(res_5a) <- rownames(res_5b) <- rownames(res_5c) <- rownames(res_5d) <- outcomes 
print(round(res_5c[,-c(5:7)],4))


## Plots & Tables #########################################################
## First results are based on results dropping ineligibles
## Results that keep ineligibles and control for ineligibility are further below

## Combining two models from each treatment arm in the same figure in main body of paper
## Only PAP Formalization-related outcomes  
## 3B no controls , 3c includes controls
tp <- cbind(res_1[,-15],res_3c)[1:3,]
tp <- cbind(res_3b,res_3c)[1:3,] 
the.labels=label.outcomes[1:3]

## TABLE 2a 
analytical.table <- res_3b[,c(1,2,4,8,9,11,15)]
rownames(analytical.table) <- label.outcomes
  #Compute HB corrected p-values by hypothesis family
  analytical.table$padj.1 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).1"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).1"],"BH"))
  analytical.table$padj.5 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).5"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).5"],"BH"))

print(xtable(analytical.table[,c(1:3,8,4:6,9,7)]
        ,digits=3#digits=c(0,2,2,3,3,2,2,3,3,0)
        ,caption="Main intent-to-treat effects (without controls)",label="tab:itt"),
      type="latex",
      hline.after=c(0,3,nrow(analytical.table)),
      caption.placement="top",
      file=paste("ReplicFigures/tab-2a.tex",sep=""))

# TABLE 2b
analytical.table <- res_3c[,c(1,2,4,8,9,11,15)]
rownames(analytical.table) <- label.outcomes
    #Compute HB corrected p-values by hypothesis family
    analytical.table$padj.1 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).1"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).1"],"BH"))
    analytical.table$padj.5 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).5"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).5"],"BH"))

print(xtable(analytical.table[,c(1:3,8,4:6,9,7)]
               ,digits=3
               ,caption="Main intent-to-treat effects (with controls)",label="tab:itt:ctrls"),
        type="latex",
        hline.after=c(0,3,nrow(analytical.table)),
        caption.placement="top",
        file=paste("ReplicFigures/tab-2b.tex",sep=""))
  
# CACE - Tables and Figures 
tp <- cbind(res_5b,res_5c)[1:3,]
the.labels=label.outcomes[1:3]

# TABLE E6a
analytical.table <- res_5b[,c(1,2,4,8,9,11,15)]#without controls
rownames(analytical.table) <- label.outcomes
#Compute HB corrected p-values by hypothesis family
analytical.table$padj.1 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).1"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).1"],"BH"))
analytical.table$padj.5 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).5"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).5"],"BH"))

print(xtable(analytical.table[,c(1:3,8,4:6,9,7)]
             ,digits=3
             ,caption="Main CACE effects (without controls)",label="tab:itt"),
      type="latex",
      hline.after=c(0,3,nrow(analytical.table)),
      caption.placement="top",
      file=paste("ReplicFigures/tab-E6a.tex",sep=""))

# TABLE E6b
analytical.table <- res_5c[,c(1,2,4,8,9,11,15)]#without controls
rownames(analytical.table) <- label.outcomes
#Compute HB corrected p-values by hypothesis family
analytical.table$padj.1 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).1"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).1"],"BH"))
analytical.table$padj.5 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).5"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).5"],"BH"))

print(xtable(analytical.table[,c(1:3,8,4:6,9,7)]
             ,digits=3 
             ,caption="Main CACE effects (with controls)",label="tab:itt:ctrls"),
      type="latex",
      hline.after=c(0,3,nrow(analytical.table)),
      caption.placement="top",
      file=paste("ReplicFigures/tab-E6b.tex",sep=""))


### Political moderators on formalization outcomes: All in a single plot

#FIGURE 1
pdf(file = paste("ReplicFigures/figure-1.pdf",sep=""),
    width = 14, height = 4)
par(mfrow = c(1,4),
    oma = c(5,12.5,0,0) + 0.1,
    mar = c(1,.35,1,.35) + 0.1)
for(the.mod in moderators){
  tp <- cbind(subset(res_41dall,moderator==the.mod),
              subset(res_45dall,moderator==the.mod))[1:3,]
  estcol= sort(c(grep("mod.pel",colnames(tp)),grep("mod.peh",colnames(tp)))) #det-main, sup-main, det-endors, sup-endorse
  secol= sort(c(grep("mod.sel",colnames(tp)),grep("mod.seh",colnames(tp)))) #det-main, sup-main, det-endors, sup-endorse
  if(the.mod==moderators[1]){the.labels<-label.outcomes[1:3]}else{the.labels<-NA} 
  offsets <- c(.22,.1,-.1,-.22)
  ys <- -1:-nrow(tp)
  xs <- c(min(-.25,min(tp[,estcol]-1.64*tp[,secol])),
          max(.25,max(tp[,estcol]+1.64*tp[,secol])))
  plot(xs,c(0-0.5,-nrow(tp)-0.35)
       ,type="n",bty="n",yaxt="n"
       ,ylab="",xlab="",main=label.moderators[which(moderators==the.mod)],cex.main=1.3)
  abline(v=0,lty=1,col=gray(0.7))
  abline(h=ys-0.5,lty=3,col=gray(0.7))
  the.cols <- c(1,1,gray(0.5),gray(0.5))
  the.bgs <- c("white","white",1,1)
  the.pchs <- c(25,24,25,24)
  for(i in 1:length(estcol)){
    segments(x0= tp[,estcol[i]]-1.64*tp[,secol[i]],
             x1= tp[,estcol[i]]+1.64*tp[,secol[i]],
             y0=ys+offsets[i],
             y1=ys+offsets[i],col=the.cols[i])
    points(tp[,estcol[i]],ys+offsets[i],bg=the.bgs[i],pch=the.pchs[i],cex=1.2,col=the.cols[i])	
  }
}
title(xlab = "Treatment Effects Given Moderator",
      ylab = "",
      outer = TRUE, line = 2, cex=1.1)
axis(side=2,at=ys
     ,labels=label.outcomes[1:3]
     ,las=2,tick=F,cex.axis=1.4,outer = TRUE)
par(mfrow=c(1, 1), oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
legend(x="bottom",
       legend=c("Detractor (Main)","Supporter (Main)","Detractor (Endorsement)","Supporter (Endorsement)"),
       pch=the.pchs,col=the.cols,pt.bg=the.bgs,bty="n",lty=1,horiz=T
       ,cex=0.9,inset=-0.01,xpd=NA
)
dev.off()

### Political Moderators on Knowledge outcomes
### FIGURE F7 
pdf(file = paste("ReplicFigures/figure-F7.pdf",sep=""),
    width = 14, height = 4)
par(mfrow = c(1,4),
    oma = c(5,7,0,0) + 0.1,
    mar = c(1,.35,1,.35) + 0.1)
for(the.mod in moderators){
  tp <- cbind(subset(res_41dall,moderator==the.mod),
              subset(res_45dall,moderator==the.mod))[4:5,]
  estcol= sort(c(grep("mod.pel",colnames(tp)),grep("mod.peh",colnames(tp)))) #det-main, sup-main, det-endors, sup-endorse
  secol= sort(c(grep("mod.sel",colnames(tp)),grep("mod.seh",colnames(tp)))) #det-main, sup-main, det-endors, sup-endorse
  if(the.mod==moderators[1]){the.labels<-label.outcomes[4:5]}else{the.labels<-NA} 
  offsets <- c(.22,.1,-.1,-.22)
  ys <- -1:-nrow(tp)
  xs <- c(min(-.25,min(tp[,estcol]-1.64*tp[,secol])),
          max(.25,max(tp[,estcol]+1.64*tp[,secol])))
  plot(xs,c(0-0.5,-nrow(tp)-0.35)
       ,type="n",bty="n",yaxt="n"
       ,ylab="",xlab="",main=label.moderators[which(moderators==the.mod)])
  abline(v=0,lty=1,col=gray(0.7))
  abline(h=ys-0.5,lty=3,col=gray(0.7))
  the.cols <- c(1,1,gray(0.5),gray(0.5))
  the.bgs <- c("white","white",1,1)
  the.pchs <- c(25,24,25,24)
  for(i in 1:length(estcol)){
    segments(x0= tp[,estcol[i]]-1.64*tp[,secol[i]],
             x1= tp[,estcol[i]]+1.64*tp[,secol[i]],
             y0=ys+offsets[i],
             y1=ys+offsets[i],col=the.cols[i])
    points(tp[,estcol[i]],ys+offsets[i],bg=the.bgs[i],pch=the.pchs[i],cex=1.1,col=the.cols[i])	
  }
}
title(xlab = "Treatment Effects (Given Moderator)",
      ylab = "",
      outer = TRUE, line = 2)
axis(side=2,at=ys
     ,labels=label.outcomes[4:5]
     ,las=2,tick=F,cex.axis=1.3,outer = TRUE)
par(mfrow=c(1, 1), oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
legend(x="bottom",
        legend=c("Detractor (Main)","Supporter (Main)","Detractor (Endorsement)","Supporter (Endorsement)"),
        pch=the.pchs,col=the.cols,pt.bg=the.bgs,bty="n",lty=1,horiz=T
        ,cex=0.9,inset=-0.01,xpd=NA
       )
dev.off()


## FIGURE 2
## Trust moderators on political outcomes 
## This plot combined the political moderator and the trust moderators 
## for knowledge outcomes. It was conceived to save space in the JOP submission
moderators4 <- c("index_lulapt_b",moderators2[c(1,2)])
label.moderators4 <- c(label.moderators[4],label.moderators2[c(1,2)])
# The different moderators come from different objects
# Combine  them here ahead of the plot
tps <-list(cbind(subset(res_41dall,moderator==moderators4[1]),
                   subset(res_45dall,moderator==moderators4[1]))[4:5,],
           cbind(subset(res_X1call,moderator==moderators4[2]),
                 subset(res_X5call,moderator==moderators4[2]))[4:5,],
           cbind(subset(res_X1call,moderator==moderators4[3]),
                 subset(res_X5call,moderator==moderators4[3]))[4:5,])
names(tps) <- moderators4
pdf(file = paste("ReplicFigures/figure-2.pdf",sep=""),
    width = 12, height = 4.5)
par(mfrow = c(1,3),
    oma = c(5.7,7,0,0) + 0.1,
    mar = c(1,1,1,1) + 0.1)
for(the.mod in moderators4){
  tp <- tps[[the.mod]]
  estcol= sort(c(grep("mod.pel",colnames(tp)),grep("mod.peh",colnames(tp)))) #det-main, sup-main, det-endors, sup-endorse
  secol= sort(c(grep("mod.sel",colnames(tp)),grep("mod.seh",colnames(tp)))) #det-main, sup-main, det-endors, sup-endorse
  offsets <-  c(.22,.1,-.1,-.22)
  ys <- -1:-nrow(tp)
  xs <- c(min(-.25,min(tp[,estcol]-1.64*tp[,secol])),
          max(.25,max(tp[,estcol]+1.64*tp[,secol])))
  plot(xs,c(0-0.5,-nrow(tp)-0.35)
       ,type="n",bty="n",yaxt="n",cex.main=1.3
       ,ylab="",xlab="",main=label.moderators4[which(moderators4==the.mod)])
  abline(v=0,lty=1,col=gray(0.7))
  abline(h=ys-0.5,lty=3,col=gray(0.7))
  the.cols <- c(1,1,gray(0.5),gray(0.5))
  the.bgs <- c("white","white",1,1)
  the.pchs <- c(25,24,25,24)
  for(i in 1:length(estcol)){
    segments(x0= tp[,estcol[i]]-1.64*tp[,secol[i]],
             x1= tp[,estcol[i]]+1.64*tp[,secol[i]],
             y0=ys+offsets[i],
             y1=ys+offsets[i],col=the.cols[i])
    points(tp[,estcol[i]],ys+offsets[i],bg=the.bgs[i],pch=the.pchs[i],cex=1.3,col=the.cols[i])	
  }
}
title(xlab = "Treatment Effects Given Moderator",
      ylab = "",
      outer = TRUE, line = 1.75)
axis(side=2,at=ys
     ,labels=label.outcomes[4:5]
     ,las=2,tick=F,cex.axis=1.4,outer = TRUE)
par(mfrow=c(1, 1), oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
legend(x=0.01,y=0.06,#x="bottomleft",
       legend=c("Lula-PT Detractor (Main)","Lula-PT Supporter (Main)","Lula-PT Detractor (Endors.)","Lula-PT Supporter (Endors.)"),
       pch=the.pchs,col=the.cols,pt.bg=the.bgs,bty="n",lty=1,ncol=2
       ,cex=0.8,inset=-0.015,xpd=NA
)
legend(x=0.56,y=0.06,#x="bottomright",
       legend=c("Low Trust (Main)","High Trust (Main)","Low Trust (Endorsement)","High Trust (Endorsement)"),
       pch=the.pchs,col=the.cols,pt.bg=the.bgs,bty="n",lty=1,ncol=2
       ,cex=0.8,inset=-0.015,xpd=NA
)
dev.off()


## Report results keeping ineligibles and controlling for ineligibility
## Combining two models from each treatment arm in the same figure in main body of paper
## Only PAP Formalization-related outcomes  

## TABLE D4 
analytical.table <- res_3bi[ ,c(1,2,4,8,9,11,15)]
rownames(analytical.table) <- label.outcomes 
#Compute HB corrected p-values by hypothesis family
analytical.table$padj.1 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).1"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).1"],"BH"))
analytical.table$padj.5 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).5"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).5"],"BH"))

print(xtable(analytical.table[,c(1:3,8,4:6,9,7)]
             ,digits=3 
             ,caption="Main intent-to-treat effects (controlling for ineligibility)",label="tab:itt:ineligibles"),
      type="latex",
      hline.after=c(0,3,nrow(analytical.table)),
      caption.placement="top",
      file=paste("ReplicFigures/tab-D4.tex",sep=""))

## TABLE D5 
analytical.table <- res_3ci[,c(1,2,4,8,9,11,15)]
rownames(analytical.table) <- label.outcomes
#Compute HB corrected p-values by hypothesis family
analytical.table$padj.1 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).1"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).1"],"BH"))
analytical.table$padj.5 <- c(p.adjust(analytical.table[c(1,2,3),"Pr(>|t|).5"],"BH"),
                             p.adjust(analytical.table[c(4,5),"Pr(>|t|).5"],"BH"))

print(xtable(analytical.table[,c(1:3,8,4:6,9,7)]
             ,digits=3 
             ,caption="Main intent-to-treat effects (with all controls)",label="tab:itt:ctrls:ineligibles"),
      type="latex",
      hline.after=c(0,3,nrow(analytical.table)),
      caption.placement="top",
      file=paste("ReplicFigures/tab-D5.tex",sep=""))


# Pooled analysis ###########################################
# This looks at average effects, pooling Z1 and Z5 together
# Registered in PAP, Mentioned in the footnote 14 and 15

# Text in footnote 14
ed$Zcomb <- ed$Z!=0
regp1 <- lm_robust(intenttoformalize_mixed_e~Zcomb, data =  ed)
regp2 <-lm_robust(formalize_admin_e~Zcomb, data =  ed)
regp3 <-lm_robust(taxcompliance_admin_e~Zcomb, data =  ed)

cat("When pooling the two treatment arms together, ITT estimates for the three formalization related outcomes are ",
  round(regp1$coef[2],3)," (p=",
  round(regp1$p.value[2],3),") ",
  round(regp2$coef[2],3)," (p=",
  round(regp2$p.value[2],3),"), and ",
  round(regp3$coef[2],3)," (p=",
  round(regp3$p.value[2],3),"), respectively.\n",sep="")

# Text in footnote 15
regp4 <-lm_robust(formula(paste("know_meigeneral_e~Zcomb","+",paste(the.ctrls,collapse="+"))), data =  ed)
regp5 <-lm_robust(formula(paste("know_meifeatures_e~Zcomb","+",paste(the.ctrls,collapse="+"))), data =  ed)
cat("Pooled estimates of the ITT on general knowledge are  ",
    round(regp4$coef[2],3)," (p=",
    round(regp4$p.value[2],3),"), and ",
    round(regp5$coef[2],3)," (p=",
    round(regp5$p.value[2],3),") for knowledge of features of the program.\n",sep="")

### Exploratory analysis section #######################

# Stats in text in Section 5: Mentioned at the start of the substitution section
dimk1 <- difference_in_means(know_meigeneral_e~formalize_admin_e, data =  ed, condition1=0,condition2=1)
dimk2 <- difference_in_means(know_meifeatures_e~formalize_admin_e, data =  ed, condition1=0,condition2=1)
cat("Those who formalized had a ",
    round(dimk2$coef[1],2),"--",
    round(dimk1$coe[1],2),
    " greater probability of being classified as knowledgeable about the MEI program.\n",sep="")


##### Robustness: Fully interactive models for political moderation for knowledge ###
pred.data <- data.frame(Z=factor(c(0,5,0,5)),
                        index_lulapt_b=quantile(ed$index_lulapt_b,probs=c(0.1,0.1,0.9,0.9),na.rm=T),
                        incomebracket_survey_b=factor(rep(2,4)),
                        female_survey_b=factor(rep(1,4)),
                        white_b=factor(rep(0,4)),
                        bussize_b=factor(rep(0,4)),
                        bussector_b=factor(rep(1,4)),
                        bustype_b=rep(1,4),
                        bolsafamilia_admin_b=factor(rep(0,4)))

# The first is a simple single-interaction model with controls, as in our specification
the.single <- formula(paste("know_meigeneral_e~index_lulapt_b*Z+",paste(the.ctrls,collapse="+")))
single <- lm_robust(the.single,data=subset(ed,Z!=1))
out.single.general <- summary(single)$coef[c("Z5", "index_lulapt_b","index_lulapt_b:Z5"),]
out.meigeneral.single <- predict(single,pred.data,interval = "confidence")$fit

# The second is a  fully interacts the moderator with the entire set of controls, including the fixed effects. @BlaOls21 call this the *fully moderated model*.
the.fully <- formula(paste("know_meigeneral_e~index_lulapt_b*(Z+",paste(the.ctrls,collapse="+"),")"))
fully <- lm_robust(the.fully ,data=subset(ed,Z!=1))
out.fully.general <-summary(fully)$coef[c("Z5", "index_lulapt_b","index_lulapt_b:Z5"),]
out.meigeneral.fully <- predict(fully,pred.data,interval = "confidence")$fit


# The first is a simple single-interaction model with controls, as in our specification
the.single <- formula(paste("know_meifeatures_e~index_lulapt_b*Z+",paste(the.ctrls,collapse="+")))
single <- lm_robust(the.single,data=subset(ed,Z!=1))
out.single.features <- summary(single)$coef[c("Z5", "index_lulapt_b:Z5"),]
out.meifeatures.single <- predict(single,pred.data,interval = "confidence")$fit

# The second is a  fully interacts the moderator with the entire set of controls, including the fixed effects. @BlaOls21 call this the *fully moderated model*.
the.fully <- formula(paste("know_meifeatures_e~index_lulapt_b*(Z+",paste(the.ctrls,collapse="+"),")"))
fully <- lm_robust(the.fully ,data=subset(ed,Z!=1))
out.fully.features <- summary(fully)$coef[c("Z5", "index_lulapt_b:Z5"),]
out.meifeatures.fully <- predict(fully,pred.data,interval = "confidence")$fit

# FIGURE G10
# Plot coefficients (direct term and interaction)
pdf(file = paste("ReplicFigures/figure-G10.pdf",sep=""),
    width = 8, height = 4.5)
par(mfrow = c(1,2),
    oma = c(5.7,3,0,0) + 0.1,
    mar = c(1,1,1,1) + 0.1)
## The  treatment effect coeff
plot(c(.5,2.5),c(-0.5,0.5),type="n",bty="n"
     ,xlab="",xaxt="n",main="General Knowledge")
ys <- c(0.8,1.2,1.8,2.2)
abline(h=0,lty=3)
segments(y0=c(out.single.general["Z5","CI Lower"],
              out.fully.general["Z5","CI Lower"],
              out.single.general["index_lulapt_b:Z5","CI Lower"],
              out.fully.general["index_lulapt_b:Z5","CI Lower"]),
         y1=c(out.single.general["Z5","CI Upper"],
              out.fully.general["Z5","CI Upper"],
              out.single.general["index_lulapt_b:Z5","CI Upper"],
              out.fully.general["index_lulapt_b:Z5","CI Upper"]),
         x0=ys,x1=ys,lty=c(1,2,1,2))
points(ys,c(out.single.general["Z5","Estimate"],
            out.fully.general["Z5","Estimate"],
            out.single.general["index_lulapt_b:Z5","Estimate"],
            out.fully.general["index_lulapt_b:Z5","Estimate"]),
       pch=21,bg=c("white",1,"white",1),cex=1.5)

#axis(side=1,at=c(.75,1.15),labels=c("Single","Fully"),line=-1)
#axis(side=1,at=c(1.75,2.15),labels=c("Single","Fully"),line=-1)
axis(side=1,at=c(0.95,1.95),labels=c("Treat","Treat X Mod."),tick=F)
abline(v=1.5,col=gray(0.7),lwd=2)

### Second outcome
plot(c(.5,2.5),c(-0.5,0.5),type="n",bty="n"
     ,xlab="",xaxt="n",main="Features Knowledge")
#mtext("Predicted share of knowledgeable respondents",side=2,line=2.5,cex=1.2)
ys <- c(0.8,1.2,1.8,2.2)
abline(h=0,lty=3)
segments(y0=c(out.single.features["Z5","CI Lower"],
              out.fully.features["Z5","CI Lower"],
              out.single.features["index_lulapt_b:Z5","CI Lower"],
              out.fully.features["index_lulapt_b:Z5","CI Lower"]),
         y1=c(out.single.features["Z5","CI Upper"],
              out.fully.features["Z5","CI Upper"],
              out.single.features["index_lulapt_b:Z5","CI Upper"],
              out.fully.features["index_lulapt_b:Z5","CI Upper"]),
         x0=ys,x1=ys,lty=c(1,2,1,2))
points(ys,c(out.single.features["Z5","Estimate"],
            out.fully.features["Z5","Estimate"],
            out.single.features["index_lulapt_b:Z5","Estimate"],
            out.fully.features["index_lulapt_b:Z5","Estimate"]),
       pch=21,bg=c("white",1,"white",1),cex=1.5)
axis(side=1,at=c(0.95,1.95),labels=c("Treat","Treat X Mod."),tick=F)
abline(v=1.5,col=gray(0.7),lwd=2)

title(xlab = "Variables and specifications",
      ylab = "Estimates",
      outer = TRUE, line = 1.75, cex=1.2)
par(mfrow=c(1, 1), oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
legend(x="bottom",
       legend=c("Single interaction","Fully interacted"),
       pch=c(21,23),pt.bg="white",bty="n",lty=c(1,3),ncol=2
       ,cex=0.8,inset=0.01,xpd=NA,pt.cex=1.1
)
dev.off()

# FIGURE G11 : Predicted values
pdf(file = paste("ReplicFigures/figure-G11.pdf",sep=""),
    width = 8, height = 4.5)
par(mfrow = c(1,2),
    oma = c(5.7,3,0,0) + 0.1,
    mar = c(1,1,1,1) + 0.1)
## First knowledge outcome
plot(c(.5,2.5),c(0,1),type="n",bty="n",
     ylab= "Predicted share of knowledgeble respondents" 
  ,xlab="",xaxt="n",main="General Knowledge")
ys <- c(0.8,1.2,1.8,2.2)
segments(y0=out.meigeneral.single[,2],
         y1=out.meigeneral.single[,3],
         x0=ys,x1=ys)
points(ys,out.meigeneral.single[,1],pch=21,bg=c("white",1,"white",1),cex=1.5)

ys <- ys-0.1
segments(y0=out.meigeneral.fully[,2],
         y1=out.meigeneral.fully[,3],
         x0=ys,x1=ys,lty=2)
points(ys,out.meigeneral.fully[,1],pch=23,bg=c("white",1,"white",1),cex=1.5)

axis(side=1,at=c(.75,1.15),labels=c("Ctrl","Treat"),line=-1)
axis(side=1,at=c(1.75,2.15),labels=c("Ctrl","Treat"),line=-1)
axis(side=1,at=c(0.95,1.95),labels=c("Detractors","Supporters"),tick=F)
abline(v=1.5,col=gray(0.7),lwd=2)

## Second knowledge outcome
plot(c(.5,2.5),c(0,1),type="n",bty="n",ylab="",xlab="",xaxt="n",main="Features Knowledge")
ys <- c(0.8,1.2,1.8,2.2)
segments(y0=out.meifeatures.single[,2],
         y1=out.meifeatures.single[,3],
         x0=ys,x1=ys)
points(ys,out.meifeatures.single[,1],pch=21,bg=c("white",1,"white",1),cex=1.5)

ys <- ys-0.1
segments(y0=out.meifeatures.fully[,2],
         y1=out.meifeatures.fully[,3],
         x0=ys,x1=ys,lty=2)
points(ys,out.meifeatures.fully[,1],pch=23,bg=c("white",1,"white",1),cex=1.5)

axis(side=1,at=c(.75,1.15),labels=c("Ctrl","Treat"),line=-1)
axis(side=1,at=c(1.75,2.15),labels=c("Ctrl","Treat"),line=-1)
axis(side=1,at=c(0.95,1.95),labels=c("Detractors","Supporters"),tick=F)
abline(v=1.5,col=gray(0.7),lwd=2)

title(xlab = "Treatment condition and political preferences",
      ylab = "Predicted share of knowledgeable respondents",
      outer = TRUE, line = 1.75, cex=1.2)
par(mfrow=c(1, 1), oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
plot(0:1, 0:1, type="n", xlab="", ylab="test", axes=FALSE)
legend(x="bottom",
       legend=c("Single interaction","Fully interacted"),
       pch=c(21,23),pt.bg="white",bty="n",lty=c(1,3),ncol=2
       ,cex=0.8,inset=0.01,xpd=NA,pt.cex=1.1
)
dev.off()

# Mediation analysis #########################################################################
# (Unplanned) Analysis of Knowledge a Mediator of Formalization 
  outcomes <- paper.outcomes[1:3]
  res_medgen <- bind_rows(lapply(1:length(outcomes)
              , FUN = est_mediation ,the.data=ed
              , ctrls=the.ctrls
              , mediator="know_meigeneral_e")) 
  rownames(res_medgen) <- label.outcomes[1:3]
  
  # TABLE H8
  res_medgenx <- xtable(res_medgen,digits=c(0,3,3,3,3,3,3,3,3))
  caption(res_medgenx) <- "Causal Mediation Analysis (Information as Mediator)"
  label(res_medgenx) <- "tab:medgen"
  print(res_medgenx,caption.placement="top", type="latex",hline.after=c(-1,0,nrow(res_medgenx)),file="ReplicFigures/tab-H8.tex")
  
  res_medfeat <- bind_rows(lapply(1:length(outcomes), FUN = est_mediation ,the.data=ed, mediator="know_meifeatures_e")) 
  rownames(res_medfeat) <- outcomes
  
  ## Stats mentioned in text in Appendix H
  ## IV estimate of Z -> information -> knowledge (pooled sample)
  ed$Zcomb <- ed$Z!=0
  iv_robust(intenttoformalize_mixed_e ~ know_meigeneral_e | Zcomb, se_type="stata", fixed_effects= block, data = ed)
  iv_robust(formalize_admin_e ~ know_meigeneral_e | Zcomb, se_type="stata", fixed_effects= block, data = ed)
  iv_robust(taxcompliance_admin_e ~ know_meigeneral_e | Zcomb, se_type="stata", fixed_effects= block, data = ed)

  # Mediation analysis #########################################################################
  # (Unplanned)  Moderated-mediation of lula eval-knowledge
  outcomes <- paper.outcomes[1:3]
  res_modmedgen5 <- bind_rows(lapply(1:length(outcomes), FUN = est_modmediation, treat="Z5"
                                     ,the.data=ed, mediator="know_meigeneral_e", moderator="lula1bin_b")) 
  rownames(res_modmedgen5) <- label.outcomes[1:3]
  
  outcomes <- paper.outcomes[1:3]
  res_modmedgen1 <- bind_rows(lapply(1:length(outcomes), FUN = est_modmediation, treat="Z1"
                                     ,the.data=ed, mediator="know_meigeneral_e", moderator="lula1bin_b")) 
  rownames(res_modmedgen1) <- label.outcomes[1:3]
  
  ### TABLE H9 (combines both of these tables, below)
  res_modmedgenx1 <- xtable(res_modmedgen1[,c(1,2,7,8,9,10,15,16)],digits=c(0,3,3,3,3,3,3,3,3))
  caption(res_modmedgenx1) <- "Moderated Causal Mediation Analysis (General Information as Mediator)"
  label(res_modmedgenx1) <- "tab:modmedgen1"
  print(res_modmedgenx1,caption.placement="top",type="latex",hline.after=c(-1,0,nrow(res_modmedgenx1)),file="ReplicFigures/tab-H9a.tex")

  res_modmedgenx5 <- xtable(res_modmedgen5[,c(1,2,7,8,9,10,15,16)],digits=c(0,3,3,3,3,3,3,3,3))
  caption(res_modmedgenx5) <- "Moderated Causal Mediation Analysis (General Information as Mediator)"
  label(res_modmedgenx5) <- "tab:modmedgen5"
  print(res_modmedgenx5,caption.placement="top",type="latex",hline.after=c(-1,0,nrow(res_modmedgenx5)),file="ReplicFigures/tab-H9a.tex")
  
## TABLE A1
## Sample by treatment condition and neighborhood
the.tab <- table(ed$nhood_name,ed$Z)
the.tab <- cbind(key=c(2,1.2,1.3,1.1,3,4,5,6,7,8,9,1.4,10,11,12,13),
                 the.tab)
the.tab <- the.tab[order(the.tab[,1]),]
the.tab <- xtable(the.tab,digits=0)
caption(the.tab) <- "Sample by Treatment Condition and Neighborhood"
label(the.tab) <- "fig-sampleN"
print(the.tab,
      type="latex",
      hline.after=c(0,1,nrow(the.tab)),
      caption.placement="top",
      file=paste("ReplicFigures/tab-A1.tex",sep=""))